{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3 Layer Network on MNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " I:349 Train-Err:0.108 Train-Acc:1.0"
     ]
    }
   ],
   "source": [
    "import sys, numpy as np\n",
    "from keras.datasets import mnist\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "\n",
    "images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])\n",
    "\n",
    "one_hot_labels = np.zeros((len(labels),10))\n",
    "for i,l in enumerate(labels):\n",
    "    one_hot_labels[i][l] = 1\n",
    "labels = one_hot_labels\n",
    "\n",
    "test_images = x_test.reshape(len(x_test),28*28) / 255\n",
    "test_labels = np.zeros((len(y_test),10))\n",
    "for i,l in enumerate(y_test):\n",
    "    test_labels[i][l] = 1\n",
    "    \n",
    "np.random.seed(1)\n",
    "relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise\n",
    "relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise\n",
    "alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)\n",
    "\n",
    "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n",
    "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n",
    "\n",
    "for j in range(iterations):\n",
    "    error, correct_cnt = (0.0, 0)\n",
    "    \n",
    "    for i in range(len(images)):\n",
    "        layer_0 = images[i:i+1]\n",
    "        layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "        layer_2 = np.dot(layer_1,weights_1_2)\n",
    "\n",
    "        error += np.sum((labels[i:i+1] - layer_2) ** 2)\n",
    "        correct_cnt += int(np.argmax(layer_2) == \\\n",
    "                                        np.argmax(labels[i:i+1]))\n",
    "\n",
    "        layer_2_delta = (labels[i:i+1] - layer_2)\n",
    "        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\\\n",
    "                                    * relu2deriv(layer_1)\n",
    "        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n",
    "        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n",
    "\n",
    "    sys.stdout.write(\"\\r I:\"+str(j)+ \\\n",
    "                     \" Train-Err:\" + str(error/float(len(images)))[0:5] +\\\n",
    "                     \" Train-Acc:\" + str(correct_cnt/float(len(images))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Test-Err:0.653 Test-Acc:0.7073\n",
      "\n"
     ]
    }
   ],
   "source": [
    "if(j % 10 == 0 or j == iterations-1):\n",
    "    error, correct_cnt = (0.0, 0)\n",
    "\n",
    "    for i in range(len(test_images)):\n",
    "\n",
    "        layer_0 = test_images[i:i+1]\n",
    "        layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "        layer_2 = np.dot(layer_1,weights_1_2)\n",
    "\n",
    "        error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n",
    "        correct_cnt += int(np.argmax(layer_2) == \\\n",
    "                                        np.argmax(test_labels[i:i+1]))\n",
    "    sys.stdout.write(\" Test-Err:\" + str(error/float(len(test_images)))[0:5] +\\\n",
    "                     \" Test-Acc:\" + str(correct_cnt/float(len(test_images))) + \"\\n\")\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " I:0 Train-Err:0.722 Train-Acc:0.537 Test-Err:0.601 Test-Acc:0.6488\n",
      " I:10 Train-Err:0.312 Train-Acc:0.901 Test-Err:0.420 Test-Acc:0.8114\n",
      " I:20 Train-Err:0.260 Train-Acc:0.93 Test-Err:0.414 Test-Acc:0.8111\n",
      " I:30 Train-Err:0.232 Train-Acc:0.946 Test-Err:0.417 Test-Acc:0.8066\n",
      " I:40 Train-Err:0.215 Train-Acc:0.956 Test-Err:0.426 Test-Acc:0.8019\n",
      " I:50 Train-Err:0.204 Train-Acc:0.966 Test-Err:0.437 Test-Acc:0.7982\n",
      " I:60 Train-Err:0.194 Train-Acc:0.967 Test-Err:0.448 Test-Acc:0.7921\n",
      " I:70 Train-Err:0.186 Train-Acc:0.975 Test-Err:0.458 Test-Acc:0.7864\n",
      " I:80 Train-Err:0.179 Train-Acc:0.979 Test-Err:0.466 Test-Acc:0.7817\n",
      " I:90 Train-Err:0.172 Train-Acc:0.981 Test-Err:0.474 Test-Acc:0.7758\n",
      " I:100 Train-Err:0.166 Train-Acc:0.984 Test-Err:0.482 Test-Acc:0.7706\n",
      " I:110 Train-Err:0.161 Train-Acc:0.984 Test-Err:0.489 Test-Acc:0.7686\n",
      " I:120 Train-Err:0.157 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766\n",
      " I:130 Train-Err:0.153 Train-Acc:0.99 Test-Err:0.502 Test-Acc:0.7622\n",
      " I:140 Train-Err:0.149 Train-Acc:0.991 Test-Err:0.508 Test-Acc:0.758\n",
      " I:150 Train-Err:0.145 Train-Acc:0.991 Test-Err:0.513 Test-Acc:0.7558\n",
      " I:160 Train-Err:0.141 Train-Acc:0.992 Test-Err:0.518 Test-Acc:0.7553\n",
      " I:170 Train-Err:0.138 Train-Acc:0.992 Test-Err:0.524 Test-Acc:0.751\n",
      " I:180 Train-Err:0.135 Train-Acc:0.995 Test-Err:0.528 Test-Acc:0.7505\n",
      " I:190 Train-Err:0.132 Train-Acc:0.995 Test-Err:0.533 Test-Acc:0.7482\n",
      " I:200 Train-Err:0.130 Train-Acc:0.998 Test-Err:0.538 Test-Acc:0.7464\n",
      " I:210 Train-Err:0.127 Train-Acc:0.998 Test-Err:0.544 Test-Acc:0.7446\n",
      " I:220 Train-Err:0.125 Train-Acc:0.998 Test-Err:0.552 Test-Acc:0.7416\n",
      " I:230 Train-Err:0.123 Train-Acc:0.998 Test-Err:0.560 Test-Acc:0.7372\n",
      " I:240 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344\n",
      " I:250 Train-Err:0.120 Train-Acc:0.999 Test-Err:0.577 Test-Acc:0.7316\n",
      " I:260 Train-Err:0.118 Train-Acc:0.999 Test-Err:0.585 Test-Acc:0.729\n",
      " I:270 Train-Err:0.117 Train-Acc:0.999 Test-Err:0.593 Test-Acc:0.7259\n",
      " I:280 Train-Err:0.115 Train-Acc:0.999 Test-Err:0.600 Test-Acc:0.723\n",
      " I:290 Train-Err:0.114 Train-Acc:0.999 Test-Err:0.607 Test-Acc:0.7196\n",
      " I:300 Train-Err:0.113 Train-Acc:0.999 Test-Err:0.614 Test-Acc:0.7183\n",
      " I:310 Train-Err:0.112 Train-Acc:0.999 Test-Err:0.622 Test-Acc:0.7165\n",
      " I:320 Train-Err:0.111 Train-Acc:0.999 Test-Err:0.629 Test-Acc:0.7133\n",
      " I:330 Train-Err:0.110 Train-Acc:0.999 Test-Err:0.637 Test-Acc:0.7125\n",
      " I:340 Train-Err:0.109 Train-Acc:1.0 Test-Err:0.645 Test-Acc:0.71\n",
      " I:349 Train-Err:0.108 Train-Acc:1.0 Test-Err:0.653 Test-Acc:0.7073\n"
     ]
    }
   ],
   "source": [
    "import sys, numpy as np\n",
    "from keras.datasets import mnist\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "\n",
    "images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])\n",
    "\n",
    "one_hot_labels = np.zeros((len(labels),10))\n",
    "for i,l in enumerate(labels):\n",
    "    one_hot_labels[i][l] = 1\n",
    "labels = one_hot_labels\n",
    "\n",
    "test_images = x_test.reshape(len(x_test),28*28) / 255\n",
    "test_labels = np.zeros((len(y_test),10))\n",
    "for i,l in enumerate(y_test):\n",
    "    test_labels[i][l] = 1\n",
    "\n",
    "np.random.seed(1)\n",
    "relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise\n",
    "relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise\n",
    "alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)\n",
    "\n",
    "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n",
    "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n",
    "\n",
    "for j in range(iterations):\n",
    "    error, correct_cnt = (0.0, 0)\n",
    "    \n",
    "    for i in range(len(images)):\n",
    "        layer_0 = images[i:i+1]\n",
    "        layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "        layer_2 = np.dot(layer_1,weights_1_2)\n",
    "\n",
    "        error += np.sum((labels[i:i+1] - layer_2) ** 2)\n",
    "        correct_cnt += int(np.argmax(layer_2) == \\\n",
    "                                        np.argmax(labels[i:i+1]))\n",
    "\n",
    "        layer_2_delta = (labels[i:i+1] - layer_2)\n",
    "        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\\\n",
    "                                    * relu2deriv(layer_1)\n",
    "        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n",
    "        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n",
    "\n",
    "    sys.stdout.write(\"\\r I:\"+str(j)+ \\\n",
    "                     \" Train-Err:\" + str(error/float(len(images)))[0:5] +\\\n",
    "                     \" Train-Acc:\" + str(correct_cnt/float(len(images))))\n",
    "    \n",
    "    if(j % 10 == 0 or j == iterations-1):\n",
    "        error, correct_cnt = (0.0, 0)\n",
    "\n",
    "        for i in range(len(test_images)):\n",
    "\n",
    "            layer_0 = test_images[i:i+1]\n",
    "            layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "            layer_2 = np.dot(layer_1,weights_1_2)\n",
    "\n",
    "            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n",
    "            correct_cnt += int(np.argmax(layer_2) == \\\n",
    "                                            np.argmax(test_labels[i:i+1]))\n",
    "        sys.stdout.write(\" Test-Err:\" + str(error/float(len(test_images)))[0:5] +\\\n",
    "                         \" Test-Acc:\" + str(correct_cnt/float(len(test_images))))\n",
    "        print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dropout In Code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "i = 0\n",
    "layer_0 = images[i:i+1]\n",
    "dropout_mask = np.random.randint(2,size=layer_1.shape)\n",
    "\n",
    "layer_1 *= dropout_mask * 2\n",
    "layer_2 = np.dot(layer_1, weights_1_2)\n",
    "\n",
    "error += np.sum((labels[i:i+1] - layer_2) ** 2)\n",
    "\n",
    "correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i+i+1]))\n",
    "\n",
    "layer_2_delta = (labels[i:i+1] - layer_2)\n",
    "layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)\n",
    "\n",
    "layer_1_delta *= dropout_mask\n",
    "\n",
    "weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n",
    "weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "I:0 Test-Err:0.641 Test-Acc:0.6333 Train-Err:0.891 Train-Acc:0.413\n",
      "I:10 Test-Err:0.458 Test-Acc:0.787 Train-Err:0.472 Train-Acc:0.764\n",
      "I:20 Test-Err:0.415 Test-Acc:0.8133 Train-Err:0.430 Train-Acc:0.809\n",
      "I:30 Test-Err:0.421 Test-Acc:0.8114 Train-Err:0.415 Train-Acc:0.811\n",
      "I:40 Test-Err:0.419 Test-Acc:0.8112 Train-Err:0.413 Train-Acc:0.827\n",
      "I:50 Test-Err:0.409 Test-Acc:0.8133 Train-Err:0.392 Train-Acc:0.836\n",
      "I:60 Test-Err:0.412 Test-Acc:0.8236 Train-Err:0.402 Train-Acc:0.836\n",
      "I:70 Test-Err:0.412 Test-Acc:0.8033 Train-Err:0.383 Train-Acc:0.857\n",
      "I:80 Test-Err:0.410 Test-Acc:0.8054 Train-Err:0.386 Train-Acc:0.854\n",
      "I:90 Test-Err:0.411 Test-Acc:0.8144 Train-Err:0.376 Train-Acc:0.868\n",
      "I:100 Test-Err:0.411 Test-Acc:0.7903 Train-Err:0.369 Train-Acc:0.864\n",
      "I:110 Test-Err:0.411 Test-Acc:0.8003 Train-Err:0.371 Train-Acc:0.868\n",
      "I:120 Test-Err:0.402 Test-Acc:0.8046 Train-Err:0.353 Train-Acc:0.857\n",
      "I:130 Test-Err:0.408 Test-Acc:0.8091 Train-Err:0.352 Train-Acc:0.867\n",
      "I:140 Test-Err:0.405 Test-Acc:0.8083 Train-Err:0.355 Train-Acc:0.885\n",
      "I:150 Test-Err:0.404 Test-Acc:0.8107 Train-Err:0.342 Train-Acc:0.883\n",
      "I:160 Test-Err:0.399 Test-Acc:0.8146 Train-Err:0.361 Train-Acc:0.876\n",
      "I:170 Test-Err:0.404 Test-Acc:0.8074 Train-Err:0.344 Train-Acc:0.889\n",
      "I:180 Test-Err:0.399 Test-Acc:0.807 Train-Err:0.333 Train-Acc:0.892\n",
      "I:190 Test-Err:0.407 Test-Acc:0.8066 Train-Err:0.335 Train-Acc:0.898\n",
      "I:200 Test-Err:0.405 Test-Acc:0.8036 Train-Err:0.347 Train-Acc:0.893\n",
      "I:210 Test-Err:0.405 Test-Acc:0.8034 Train-Err:0.336 Train-Acc:0.894\n",
      "I:220 Test-Err:0.402 Test-Acc:0.8067 Train-Err:0.325 Train-Acc:0.896\n",
      "I:230 Test-Err:0.404 Test-Acc:0.8091 Train-Err:0.321 Train-Acc:0.894\n",
      "I:240 Test-Err:0.415 Test-Acc:0.8091 Train-Err:0.332 Train-Acc:0.898\n",
      "I:250 Test-Err:0.395 Test-Acc:0.8182 Train-Err:0.320 Train-Acc:0.899\n",
      "I:260 Test-Err:0.390 Test-Acc:0.8204 Train-Err:0.321 Train-Acc:0.899\n",
      "I:270 Test-Err:0.382 Test-Acc:0.8194 Train-Err:0.312 Train-Acc:0.906\n",
      "I:280 Test-Err:0.396 Test-Acc:0.8208 Train-Err:0.317 Train-Acc:0.9\n",
      "I:290 Test-Err:0.399 Test-Acc:0.8181 Train-Err:0.301 Train-Acc:0.908"
     ]
    }
   ],
   "source": [
    "import numpy, sys\n",
    "np.random.seed(1)\n",
    "def relu(x):\n",
    "    return (x >= 0) * x # returns x if x > 0\n",
    "                        # returns 0 otherwise\n",
    "\n",
    "def relu2deriv(output):\n",
    "    return output >= 0 #returns 1 for input > 0\n",
    "\n",
    "alpha, iterations, hidden_size = (0.005, 300, 100)\n",
    "pixels_per_image, num_labels = (784, 10)\n",
    "\n",
    "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n",
    "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n",
    "\n",
    "for j in range(iterations):\n",
    "    error, correct_cnt = (0.0,0)\n",
    "    for i in range(len(images)):\n",
    "        layer_0 = images[i:i+1]\n",
    "        layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "        dropout_mask = np.random.randint(2, size=layer_1.shape)\n",
    "        layer_1 *= dropout_mask * 2\n",
    "        layer_2 = np.dot(layer_1,weights_1_2)\n",
    "\n",
    "        error += np.sum((labels[i:i+1] - layer_2) ** 2)\n",
    "        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))\n",
    "        layer_2_delta = (labels[i:i+1] - layer_2)\n",
    "        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)\n",
    "        layer_1_delta *= dropout_mask\n",
    "\n",
    "        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n",
    "        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n",
    "\n",
    "    if(j%10 == 0):\n",
    "        test_error = 0.0\n",
    "        test_correct_cnt = 0\n",
    "\n",
    "        for i in range(len(test_images)):\n",
    "            layer_0 = test_images[i:i+1]\n",
    "            layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "            layer_2 = np.dot(layer_1, weights_1_2)\n",
    "\n",
    "            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n",
    "            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))\n",
    "\n",
    "        sys.stdout.write(\"\\n\" + \\\n",
    "                         \"I:\" + str(j) + \\\n",
    "                         \" Test-Err:\" + str(test_error/ float(len(test_images)))[0:5] +\\\n",
    "                         \" Test-Acc:\" + str(test_correct_cnt/ float(len(test_images)))+\\\n",
    "                         \" Train-Err:\" + str(error/ float(len(images)))[0:5] +\\\n",
    "                         \" Train-Acc:\" + str(correct_cnt/ float(len(images))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Batch Gradient Descent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "I:0 Test-Err:0.815 Test-Acc:0.3832 Train-Err:1.284 Train-Acc:0.165\n",
      "I:10 Test-Err:0.568 Test-Acc:0.7173 Train-Err:0.591 Train-Acc:0.672\n",
      "I:20 Test-Err:0.510 Test-Acc:0.7571 Train-Err:0.532 Train-Acc:0.729\n",
      "I:30 Test-Err:0.485 Test-Acc:0.7793 Train-Err:0.498 Train-Acc:0.754\n",
      "I:40 Test-Err:0.468 Test-Acc:0.7877 Train-Err:0.489 Train-Acc:0.749\n",
      "I:50 Test-Err:0.458 Test-Acc:0.793 Train-Err:0.468 Train-Acc:0.775\n",
      "I:60 Test-Err:0.452 Test-Acc:0.7995 Train-Err:0.452 Train-Acc:0.799\n",
      "I:70 Test-Err:0.446 Test-Acc:0.803 Train-Err:0.453 Train-Acc:0.792\n",
      "I:80 Test-Err:0.451 Test-Acc:0.7968 Train-Err:0.457 Train-Acc:0.786\n",
      "I:90 Test-Err:0.447 Test-Acc:0.795 Train-Err:0.454 Train-Acc:0.799\n",
      "I:100 Test-Err:0.448 Test-Acc:0.793 Train-Err:0.447 Train-Acc:0.796\n",
      "I:110 Test-Err:0.441 Test-Acc:0.7943 Train-Err:0.426 Train-Acc:0.816\n",
      "I:120 Test-Err:0.442 Test-Acc:0.7966 Train-Err:0.431 Train-Acc:0.813\n",
      "I:130 Test-Err:0.441 Test-Acc:0.7906 Train-Err:0.434 Train-Acc:0.816\n",
      "I:140 Test-Err:0.447 Test-Acc:0.7874 Train-Err:0.437 Train-Acc:0.822\n",
      "I:150 Test-Err:0.443 Test-Acc:0.7899 Train-Err:0.414 Train-Acc:0.823\n",
      "I:160 Test-Err:0.438 Test-Acc:0.797 Train-Err:0.427 Train-Acc:0.811\n",
      "I:170 Test-Err:0.440 Test-Acc:0.7884 Train-Err:0.418 Train-Acc:0.828\n",
      "I:180 Test-Err:0.436 Test-Acc:0.7935 Train-Err:0.407 Train-Acc:0.834\n",
      "I:190 Test-Err:0.434 Test-Acc:0.7935 Train-Err:0.410 Train-Acc:0.831\n",
      "I:200 Test-Err:0.435 Test-Acc:0.7972 Train-Err:0.416 Train-Acc:0.829\n",
      "I:210 Test-Err:0.434 Test-Acc:0.7923 Train-Err:0.409 Train-Acc:0.83\n",
      "I:220 Test-Err:0.433 Test-Acc:0.8032 Train-Err:0.396 Train-Acc:0.832\n",
      "I:230 Test-Err:0.431 Test-Acc:0.8036 Train-Err:0.393 Train-Acc:0.853\n",
      "I:240 Test-Err:0.430 Test-Acc:0.8047 Train-Err:0.397 Train-Acc:0.844\n",
      "I:250 Test-Err:0.429 Test-Acc:0.8028 Train-Err:0.386 Train-Acc:0.843\n",
      "I:260 Test-Err:0.431 Test-Acc:0.8038 Train-Err:0.394 Train-Acc:0.843\n",
      "I:270 Test-Err:0.428 Test-Acc:0.8014 Train-Err:0.384 Train-Acc:0.845\n",
      "I:280 Test-Err:0.430 Test-Acc:0.8067 Train-Err:0.401 Train-Acc:0.846\n",
      "I:290 Test-Err:0.428 Test-Acc:0.7975 Train-Err:0.383 Train-Acc:0.851"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "np.random.seed(1)\n",
    "\n",
    "def relu(x):\n",
    "    return (x >= 0) * x # returns x if x > 0\n",
    "\n",
    "def relu2deriv(output):\n",
    "    return output >= 0 # returns 1 for input > 0\n",
    "\n",
    "batch_size = 100\n",
    "alpha, iterations = (0.001, 300)\n",
    "pixels_per_image, num_labels, hidden_size = (784, 10, 100)\n",
    "\n",
    "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n",
    "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n",
    "\n",
    "for j in range(iterations):\n",
    "    error, correct_cnt = (0.0, 0)\n",
    "    for i in range(int(len(images) / batch_size)):\n",
    "        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))\n",
    "\n",
    "        layer_0 = images[batch_start:batch_end]\n",
    "        layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "        dropout_mask = np.random.randint(2,size=layer_1.shape)\n",
    "        layer_1 *= dropout_mask * 2\n",
    "        layer_2 = np.dot(layer_1,weights_1_2)\n",
    "\n",
    "        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)\n",
    "        for k in range(batch_size):\n",
    "            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))\n",
    "\n",
    "            layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size\n",
    "            layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)\n",
    "            layer_1_delta *= dropout_mask\n",
    "\n",
    "            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n",
    "            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n",
    "            \n",
    "    if(j%10 == 0):\n",
    "        test_error = 0.0\n",
    "        test_correct_cnt = 0\n",
    "\n",
    "        for i in range(len(test_images)):\n",
    "            layer_0 = test_images[i:i+1]\n",
    "            layer_1 = relu(np.dot(layer_0,weights_0_1))\n",
    "            layer_2 = np.dot(layer_1, weights_1_2)\n",
    "\n",
    "            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n",
    "            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))\n",
    "\n",
    "        sys.stdout.write(\"\\n\" + \\\n",
    "                         \"I:\" + str(j) + \\\n",
    "                         \" Test-Err:\" + str(test_error/ float(len(test_images)))[0:5] +\\\n",
    "                         \" Test-Acc:\" + str(test_correct_cnt/ float(len(test_images)))+\\\n",
    "                         \" Train-Err:\" + str(error/ float(len(images)))[0:5] +\\\n",
    "                         \" Train-Acc:\" + str(correct_cnt/ float(len(images))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
